## An object of class "DGEList"
## $counts
## R R R R R R E E E E E E
## ENSECAG00000002020 0 0 0 0 0 0 0 0 0 0 0 0
## ENSECAG00000006502 0 0 0 0 0 0 0 0 0 0 0 0
## ENSECAG00000023919 3 14 18 14 24 24 4 29 16 28 8 25
## ENSECAG00000002066 0 0 0 0 0 0 0 0 0 0 0 0
## ENSECAG00000006515 0 0 0 0 0 0 0 0 0 0 0 0
## 26949 more rows ...
##
## $samples
## group lib.size norm.factors
## 1 1 2130248 0.6357591
## 2 1 16246106 1.0234993
## 3 1 13764962 0.9870797
## 4 1 19432713 1.0187328
## 5 1 14973545 1.0570107
## 7 more rows ...
## [1] 12575 12
Specify the model to be fitted.
## groupE groupR
## 1 0 1
## 2 0 1
## 3 0 1
## 4 0 1
## 5 0 1
## 6 0 1
## 7 1 0
## 8 1 0
## 9 1 0
## 10 1 0
## 11 1 0
## 12 1 0
## attr(,"assign")
## [1] 1 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
limma lmFit fits a linear model using weighted least squares for each gene:
reactable(coef(fit), bordered = T,highlight = T,height = 300, defaultPageSize = 15, searchable = T, filterable = T,defaultColDef = colDef(headerClass = "sort-header", minWidth = 85,
headerStyle = list(background = "#B7BABA")),columns = list( groupR = colDef(style = function(value) {
if (value > 0) {
color <- "#32B9A7"
} else if (value < 0) {
color <- "#9D9C9C"
} else {
color <- "#777"
}
list(color = color, fontWeight = "bold")}),
groupE=colDef(style = function(value) {
if (value > 0) {
color <- "#32B9A7"
} else if (value < 0) {
color <- "#9D9C9C"
} else {
color <- "#777"
}
list(color = color, fontWeight = "bold")
})))
Comparison between before exercise and after exercise
## Contrasts
## Levels groupE - groupR
## groupE 1
## groupR -1
Estimate contrast for each gene
Empirical Bayes smoothing of standard errors
What genes are most differentially expressed?
Filter the genes that have adjPval < 0.05
Filter genes with |LogFC| <= 0.5
Biomart to get the top 20 gene names
library(biomaRt)
e = useMart("ensembl")
ensembl = useEnsembl(biomart = "ensembl", dataset = "ecaballus_gene_ensembl")
horse = useDataset(dataset = "ecaballus_gene_ensembl",mart=e)
g.names = getBM(attributes = c("external_gene_name", "ensembl_gene_id"), filters = "ensembl_gene_id", values=top.table[1:20,1], mart=ensembl)PS: if you click on the gene, it will lead you it’s ensembl page.
reactable(top20[,-ncol(top20)], resizable = TRUE, bordered = TRUE, highlight = TRUE, showSortIcon = T,fullWidth = TRUE,wrap = F, searchable = TRUE,filterable = TRUE, defaultPageSize = 10, rownames = TRUE,
defaultColDef = colDef(headerClass = "sort-header", minWidth = 85,
headerStyle = list(background = "#B7BABA")), columns = list(
Gene = colDef(html = TRUE, cell = JS("function(cellInfo) {
// Render as a link
var url = 'https://asia.ensembl.org/Equus_caballus/Search/Results?q='+cellInfo.value+';site=ensembl;facet_species=Horse;page=1' + cellInfo.row.Gene + '_' + cellInfo.value
return '<a href=\"' + url + '\" target=\"_blank\">' + cellInfo.value + '</a>'
}"
))))## An object of class "DGEList"
## $counts
## R R R R R R E E E E E E
## ENSECAG00000002020 0 0 0 0 0 0 0 0 0 0 0 0
## ENSECAG00000006502 0 0 0 0 0 0 0 0 0 0 0 0
## ENSECAG00000023919 3 6 167 47 60 67 4 54 23 111 125 41
## ENSECAG00000002066 2 0 0 2 0 6 1 4 2 2 2 12
## ENSECAG00000006515 0 0 0 0 0 0 0 0 0 1 0 0
## 26949 more rows ...
##
## $samples
## group lib.size norm.factors
## 1 1 1214190 1.048777
## 2 1 1136857 1.003369
## 3 1 23500486 1.054662
## 4 1 23524764 1.028951
## 5 1 24598794 0.939532
## 7 more rows ...
## [1] 14250 12
Specify the model to be fitted.
## groupE groupR
## 1 0 1
## 2 0 1
## 3 0 1
## 4 0 1
## 5 0 1
## 6 0 1
## 7 1 0
## 8 1 0
## 9 1 0
## 10 1 0
## 11 1 0
## 12 1 0
## attr(,"assign")
## [1] 1 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
limma lmFit fits a linear model using weighted least squares for each gene:
reactable(coef(fit), bordered = T,highlight = T,height = 300, defaultPageSize = 15, searchable = T, filterable = T,defaultColDef = colDef(headerClass = "sort-header", minWidth = 85,
headerStyle = list(background = "#B7BABA")),columns = list( groupR = colDef(style = function(value) {
if (value > 0) {
color <- "#32B9A7"
} else if (value < 0) {
color <- "#9D9C9C"
} else {
color <- "#777"
}
list(color = color, fontWeight = "bold")}),
groupE=colDef(style = function(value) {
if (value > 0) {
color <- "#32B9A7"
} else if (value < 0) {
color <- "#9D9C9C"
} else {
color <- "#777"
}
list(color = color, fontWeight = "bold")
})))
Comparison between before exercise and after exercise
## Contrasts
## Levels groupE - groupR
## groupE 1
## groupR -1
Estimate contrast for each gene
Empirical Bayes smoothing of standard errors
What genes are most differentially expressed?
Filter the genes that have adjPval < 0.05
Filter genes with |LogFC| <= 0.5
Biomart to get the top 20 gene names
PS: if you click on the gene, it will lead you it’s ensembl page.
reactable(top20[,-ncol(top20)], resizable = TRUE, bordered = TRUE, highlight = TRUE, showSortIcon = T,fullWidth = TRUE,wrap = F, searchable = TRUE,filterable = TRUE, defaultPageSize = 10, rownames = TRUE,
defaultColDef = colDef(headerClass = "sort-header", minWidth = 85,
headerStyle = list(background = "#B7BABA")), columns = list(
Gene = colDef(html = TRUE, cell = JS("function(cellInfo) {
// Render as a link
var url = 'https://asia.ensembl.org/Equus_caballus/Search/Results?q='+cellInfo.value+';site=ensembl;facet_species=Horse;page=1' + cellInfo.row.Gene + '_' + cellInfo.value
return '<a href=\"' + url + '\" target=\"_blank\">' + cellInfo.value + '</a>'
}"
))))de1<- read.table("Desktop/GATK/DE/Fil DEs/DEHorse1filtered.txt", header = T)
de2 <- read.table("Desktop/GATK/DE/Fil DEs/DEHorse2filtered.txt", header = T)
de3 <- read.table("Desktop/GATK/DE/Fil DEs/DEHorse3filtered.txt", header = T)
de4<- read.table("Desktop/GATK/DE/Fil DEs/DEHorse4filtered.txt", header = T)
de5 <- read.table("Desktop/GATK/DE/Fil DEs/DEHorse5filtered.txt", header = T)
de6<- read.table("Desktop/GATK/DE/Fil DEs/DEHorse6filtered.txt", header = T)Annotated VCF by biomart
horse1=read.table("Desktop/pipeline/biomart/horse1.txt", header=F, sep = "\t")
horse2=read.table("Desktop/pipeline/biomart/horse2.txt", header=F, sep = "\t")
horse3=read.table("Desktop/pipeline/biomart/horse3.txt", header=F, sep = "\t")
horse4=read.table("Desktop/pipeline/biomart/horse4.txt", header=F, sep = "\t")
horse5=read.table("Desktop/pipeline/biomart/horse5.txt", header=F, sep = "\t")
horse6=read.table("Desktop/pipeline/biomart/horse6.txt", header=F, sep = "\t")filter DE by |logFC| < 0.5
FC_filter_function <- function(x)
{
x <- x[-which(abs(x$logFC)<0.5),]
return(x)
}
de_list <- list(de1,de2,de3,de4,de5,de6)
de_filtered= lapply(de_list, FC_filter_function)Top Genes that had a meaningful role in horse athletic performance from enrichr.
## [1] 64
novel_count <- function(horsex){
novel=length(which(horsex[,1]=="."))
return(novel)
}
known_count <-function(horsex){
known=length(which(horsex[,1]!="."))
return(known)
}
conseq<- function(horsexx){
total = nrow(horsexx)
others=100
intron_variantt = ((length(which(horsexx[,4]=="intron_variant"))))
intron_variant=paste0(floor(intron_variantt*100), "%")
downstream_gene_variantt=((length(which(horsexx[,4]=="downstream_gene_variant"))))
downstream_gene_variant=paste0(floor(downstream_gene_variantt*100), "%")
synonymous_variantt=((length(which(horsexx[,4]=="synonymous_variant"))))
synonymous_variant=paste0(floor(synonymous_variantt*100), "%")
upstream_gene_variant=((length(which(horsexx[,4]=="upstream_gene_variant"))))
upstream_gene_varian=paste0(floor(upstream_gene_variant*100), "%")
missense_variantt=((length(which(horsexx[,4]=="missense_variant"))))
missense_variant=paste0(floor(missense_variantt*100), "%")
three_prime_UTR_variantt=((length(which(horsexx[,4]=="3_prime_UTR_variant"))))
three_prime_UTR_variant=paste0(floor(three_prime_UTR_variantt*100), "%")
intergenic_variantt=((length(which(horsexx[,4]=="intergenic_variant"))))
intergenic_variant=paste0(floor(intergenic_variantt*100), "%")
splice_region_variantt=((length(which(horsexx[,4]=="splice_region_variant"))))
splice_region_variant=paste0(floor(splice_region_variantt*100), "%")
non_coding_transcript_variantt=((length(which(horsexx[,4]=="non_coding_transcript_variant"))))
non_coding_transcript_variant=paste0(floor(non_coding_transcript_variantt*100), "%")
otherss=others-intron_variantt-non_coding_transcript_variantt-splice_region_variantt-intergenic_variantt-three_prime_UTR_variantt-missense_variantt-upstream_gene_variant-synonymous_variantt-downstream_gene_variantt
others=paste0(floor(others*100), "%")
return(c(intron_variantt,downstream_gene_variantt, upstream_gene_variant,synonymous_variantt,missense_variantt, three_prime_UTR_variantt,intergenic_variantt,splice_region_variantt,non_coding_transcript_variantt, otherss))
}all_conseq=data.frame()
for(i in list(horse1,horse2,horse3,horse4,horse5,horse6)){
c=conseq(i)
all_conseq=rbind(all_conseq,c)
}
colnames(all_conseq)=c("intron_variant","downstream_gene_variant", "upstream_gene_varian","synonymous_variant","missense_variant", "3_prime_UTR_variant","intergenic_variant","splice_region_variant","non_coding_transcript_variant", "others")reactable(results, bordered = T,highlight = T,fullWidth = F,height = 300, defaultPageSize = 15, searchable = F, filterable = F, defaultColDef = colDef(headerClass = "sort-header", minWidth= 85,headerStyle = list(background = "#B7BABA")),columns = list( Horse = colDef(style = function(value) {
if (value ==1) {
color <- "#82E1D0"
} else if (value ==2) {
color <- "#7ACFBF"
}else if (value ==3) {
color <- "#69B4A7"
} else if (value ==4) {
color <- "#5FA599"
} else if (value ==5) {
color <- "#549085"
}else {
color <- "#4F877C"
}
list(color = color, fontWeight = "bold")})
))library(plotly)
fig <- plot_ly(cc1, labels = ~group, values = ~value, type ='pie', textinfo='percent',name='Horse 1',domain = list(x = c(0, 0.4), y = c(0.4, 1)) )
fig <- fig %>% layout(title = 'SNPs Distribution.',showlegend = T,
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
fig <- fig %>% add_pie(data = cc2, labels = ~group, values = ~value,
name = "Horse2", domain = list(x = c(0.25, 0.75), y = c(0, 0.5)))
fig <- fig %>% add_pie(data = cc3, labels = ~group, values = ~value,
name = "Horse3", domain = list(x = c(0.6, 1), y = c(0.4, 1)))
fig2 <- plot_ly(cc4, labels = ~group, values = ~value, type = 'pie', textinfo='percent',name='Horse 4',domain = list(x = c(0, 0.4), y = c(0.4, 1)) )
fig2<- fig2 %>% layout(title = 'SNPs Distribution.',showlegend = F,
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
fig2 <- fig2 %>% add_pie(data = cc5, labels = ~group, values = ~value,
name = "Horse 5", domain = list(x = c(0.25, 0.75), y = c(0, 0.5)))
fig2 <- fig2 %>% add_pie(data = cc6, labels = ~group, values = ~value,
name = "Horse 6", domain = list(x = c(0.6, 1), y = c(0.4, 1)))
figWe will only visualize 1 gene for simplicity. However, in practice we did generate plots for the 64 genes.
Gene’s LogFC for each horse.
library(ggplot2)
library(reshape2)
library(FSA)
library(plotly)
table2=matrix(NA,nrow = 1, ncol =6)
table2=table2[-1,]
for(i in "ENSECAG00000019476"){
d1=which(de_filtered[[1]]$Gene==i)
d2= which(de_filtered[[2]]$Gene==i)
d3= which(de_filtered[[3]]$Gene==i)
d4= which(de_filtered[[4]]$Gene==i)
d5=which(de_filtered[[5]]$Gene==i)
d6= which(de_filtered[[6]]$Gene==i)
if(length(d1)==0){
e1=0
}else{e1=de_filtered[[1]]$logFC[d1]}
if(length(d2)==0){
e2=0
}else{e2=de_filtered[[2]]$logFC[d2]}
if(length(d3)==0){
e3=0
}else{e3=de_filtered[[3]]$logFC[d3]}
if(length(d4)==0){
e4=0
}else{e4=de_filtered[[4]]$logFC[d4]}
if(length(d6)==0){
e6=0
}else{e6=de_filtered[[6]]$logFC[d6]}
if(length(d5)==0){
e5=0
}else{e5=de_filtered[[5]]$logFC[d5]}
ACTN1=c(e1, e2, e3,e4,e5,e6)
table2=rbind(table2,ACTN1)
}
melted=melt(table2)
colnames(melted)=c("genes", "Horse","LogFC")
if(length(which(melted$LogFC==0)!=0)){
melted=melted[-which(melted$LogFC==0),]}
melted$sample=as.factor(melted$Horse)
p<- ggplot(melted, aes(x=genes,y=LogFC, fill=sample))+scale_fill_brewer(palette = "Set3") + geom_bar(stat="identity",position="dodge", width = 0.9) +facet_wrap(vars(genes))The venn diagram for the variants per horse
library(venn)
for (i in 1:6) {
x=read.csv(paste("Desktop/pipeline/ACTN1A",i,".txt",sep =""),sep =" ",header = FALSE )
x[,1]=as.character(x[,1])
x[,2]=as.character(x[,2])
if(length(which(x[,1]=="."))>0){
arr=which(x[,1]==".")
for (j in arr) {
x[j,1]=strsplit(x[j,2],split ="|",fixed = TRUE)[[1]][10]
}
}
assign(paste("A",i,sep =""),x)
}
v=venn(
x = list(A1[,1],A2[,1],A3[,1],A4[,1],A5[,1],A6[,1]),
snames = c("A1","A2","A3","A4","A5","A6"),
zcolor =c("orange","blue","yellow","red","green","purple"),
ilabels = TRUE
)